#!/usr/bin/env python3
"""
Compute ratios of adjoint to fundamental string tensions using normalized
per‑plaquette values.  This script reads the summary CSVs produced by
``vol4_wilson_loop_adjoint_volume_sweep/run.py`` (which contains
``string_tension`` and ``string_tension_err`` columns) and by the
fundamental lattice sweep module (which should contain
``string_tension`` and optional ``string_tension_err``).  It then
merges the two tables on the sweep parameters and outputs a new CSV
containing the ratio and its propagated uncertainty.

Usage:

    python scripts/compute_adj_fund_ratio.py \
        --adjoint data/results/vol4_wilson_loop_adjoint_volume_sweep/adjoint_volume_summary.csv \
        --fundamental data/results/vol4_wilson_loop_pipeline_lattice_sweep/string_tension_summary.csv \
        --out data/results/adjoint_fund_ratio_summary.csv

The output CSV will contain the columns:

* ``b, k, n0, L, gauge`` – sweep parameters
* ``string_tension_adj`` – adjoint string tension
* ``string_tension_adj_err`` – adjoint string tension error
* ``string_tension_fund`` – fundamental string tension
* ``string_tension_fund_err`` – fundamental string tension error (0 if missing)
* ``ratio`` – adjoint / fundamental string tension
* ``ratio_err`` – propagated uncertainty on the ratio

Rows where either string tension is non‑positive are dropped.
"""
import argparse
import os
import pandas as pd
import numpy as np


def load_and_normalize(path: str, prefix: str) -> pd.DataFrame:
    """Load a CSV and rename the string_tension columns with a prefix."""
    df = pd.read_csv(path)
    # Standardize column names
    if "string_tension" not in df.columns:
        raise ValueError(f"{path} must contain a 'string_tension' column")
    df = df.rename(columns={"string_tension": f"string_tension_{prefix}"})
    # If error column missing, create zeros
    err_col = "string_tension_err"
    if err_col in df.columns:
        df = df.rename(columns={err_col: f"string_tension_{prefix}_err"})
    else:
        df[f"string_tension_{prefix}_err"] = 0.0
    return df


def main() -> None:
    ap = argparse.ArgumentParser()
    ap.add_argument("--adjoint", required=True, help="Adjoint summary CSV")
    ap.add_argument("--fundamental", required=True, help="Fundamental summary CSV")
    ap.add_argument("--out", required=True, help="Output CSV for ratios")
    args = ap.parse_args()

    adj = load_and_normalize(args.adjoint, "adj")
    fund = load_and_normalize(args.fundamental, "fund")

    # Determine keys to merge on (common columns excluding measured fields)
    measure_cols = {
        col
        for col in adj.columns
        if col.startswith("string_tension") or col.endswith("_err")
    }
    keys = [c for c in adj.columns if c not in measure_cols]
    # Also ensure fundamental has the same key columns
    for k in keys:
        if k not in fund.columns:
            raise ValueError(f"Key column '{k}' missing from fundamental CSV")

    merged = adj.merge(fund[keys + [
        "string_tension_fund", "string_tension_fund_err"
    ]], on=keys, how="inner")

    # Filter out non‑positive tensions
    m = (merged["string_tension_adj"] > 0) & (merged["string_tension_fund"] > 0)
    merged = merged[m].copy()
    # Compute ratio and propagate uncertainty
    merged["ratio"] = merged["string_tension_adj"] / merged["string_tension_fund"]
    # Propagate error: ratio_err = ratio * sqrt((err_adj/adj)^2 + (err_fund/fund)^2)
    err_adj = merged["string_tension_adj_err"] / merged["string_tension_adj"]
    err_fund = merged["string_tension_fund_err"] / merged["string_tension_fund"]
    merged["ratio_err"] = merged["ratio"] * np.sqrt(err_adj ** 2 + err_fund ** 2)

    # Save output
    out_path = args.out
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    merged.to_csv(out_path, index=False)
    # Print simple statistics to stdout
    print(
        "Mean ratio by gauge (clean, normalized):",
        merged.groupby("gauge")["ratio"].mean().to_dict(),
    )


if __name__ == "__main__":
    main()